Load R libraries
library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.4, built: 2015-12-05)
## ## Copyright (C) 2005-2018 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(mice)
## Loading required package: lattice
library(ggplot2)
library(lattice)
Load vehicle data without missing target.
setwd("/Users/davidleonardi/Projects/KE5107_BayesianNetworks/")
data <- read.csv("./vehicle_safety_remove_missing_target.csv")
data$GV_LANES <- as.factor(data$GV_LANES)
data$GV_MODELYR <- as.factor(data$GV_MODELYR)
data$GV_WGTCDTR <- as.factor(data$GV_WGTCDTR)
data$OA_BAGDEPLY <- as.factor(data$OA_BAGDEPLY)
data$OA_MAIS <- as.factor(data$OA_MAIS)
data$OA_MANUSE <- as.factor(data$OA_MANUSE)
data$OA_SEX <- as.factor(data$OA_SEX)
data$VE_GAD1 <- as.factor(data$VE_GAD1)
summary(data)
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY
## Min. : 670 Min. :-114.000 Min. :-145.00 Min. : 4.0
## 1st Qu.:1360 1st Qu.: -6.000 1st Qu.: -23.00 1st Qu.: 155.0
## Median :1530 Median : 0.000 Median : -15.00 Median : 305.0
## Mean :1618 Mean : 0.113 Mean : -14.75 Mean : 501.5
## 3rd Qu.:1830 3rd Qu.: 7.000 3rd Qu.: -8.00 3rd Qu.: 598.0
## Max. :4310 Max. : 118.000 Max. : 84.00 Max. :9852.0
## NA's :39 NA's :5877 NA's :5877 NA's :5877
## GV_LANES GV_MODELYR GV_OTVEHWGT GV_SPLIMIT
## 2 :7347 2000 :2670 Min. : 640 Min. : 0.00
## 4 :3922 2002 :2659 1st Qu.:1340 1st Qu.:35.00
## 3 :3520 2001 :2577 Median :1550 Median :40.00
## 5 :2981 2003 :2348 Mean :1630 Mean :40.72
## 6 : 651 2004 :2048 3rd Qu.:1840 3rd Qu.:45.00
## (Other): 779 2005 :1896 Max. :4540 Max. :75.00
## NA's : 3 (Other):5005 NA's :1992 NA's :221
## GV_WGTCDTR OA_AGE OA_BAGDEPLY
## Passenger Car :11800 Min. : 0.00 Deployed : 9017
## Truck (<=10000 lbs.): 2414 1st Qu.:25.00 Not Deployed:10186
## Truck (<=6000 lbs.) : 4989 Median :37.00
## Mean :40.23
## 3rd Qu.:52.00
## Max. :97.00
## NA's :14
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT
## Min. : 59.0 0:7319 0 : 2175 : 201 Min. : 31.0
## 1st Qu.:163.0 1:8814 1 :16643 Female:9453 1st Qu.: 64.0
## Median :170.0 2:1517 NA's: 385 Male :9549 Median : 77.0
## Mean :170.8 3: 950 Mean : 78.8
## 3rd Qu.:178.0 4: 301 3rd Qu.: 91.0
## Max. :216.0 5: 217 Max. :150.0
## NA's :2116 6: 85 NA's :2008
## VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## : 771 Min. :105.0 Min. :141.0 Min. : 5.0
## Front:11176 1st Qu.:149.0 1st Qu.:262.0 1st Qu.:115.0
## Left : 2997 Median :154.0 Median :272.0 Median :135.0
## Rear : 1629 Mean :154.8 Mean :281.1 Mean :152.2
## Right: 2630 3rd Qu.:158.0 3rd Qu.:288.0 3rd Qu.:175.0
## Max. :185.0 Max. :481.0 Max. :355.0
## NA's :219 NA's :8 NA's :1864
## GV_FOOTPRINT
## Min. :2.468
## 1st Qu.:3.925
## Median :4.200
## Mean :4.364
## 3rd Qu.:4.554
## Max. :7.795
## NA's :223
Show missing data in diagram.
missmap(data[-1], col=c('grey', 'steelblue'), y.cex=0.5, x.cex=0.8)
Get hard numbers of missing data.
# Let's also get some hard numbers
sort(sapply(data, function(x) { sum(is.na(x)) }), decreasing=TRUE)
## GV_DVLAT GV_DVLONG GV_ENERGY OA_HEIGHT OA_WEIGHT
## 5877 5877 5877 2116 2008
## GV_OTVEHWGT VE_PDOF_TR OA_MANUSE GV_FOOTPRINT GV_SPLIMIT
## 1992 1864 385 223 221
## VE_ORIGAVTW GV_CURBWGT OA_AGE VE_WHEELBAS GV_LANES
## 219 39 14 8 3
## GV_MODELYR GV_WGTCDTR OA_BAGDEPLY OA_MAIS OA_SEX
## 0 0 0 0 0
## VE_GAD1
## 0
Show missing data pattern.
md.pattern(data)
## GV_MODELYR GV_WGTCDTR OA_BAGDEPLY OA_MAIS OA_SEX VE_GAD1 GV_LANES
## 10930 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 0
## 250 1 1 1 1 1 1 1
## 97 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 138 1 1 1 1 1 1 1
## 51 1 1 1 1 1 1 1
## 64 1 1 1 1 1 1 1
## 385 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1122 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1
## 77 1 1 1 1 1 1 1
## 2436 1 1 1 1 1 1 1
## 25 1 1 1 1 1 1 1
## 20 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1
## 48 1 1 1 1 1 1 1
## 24 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1
## 15 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 0
## 1221 1 1 1 1 1 1 1
## 35 1 1 1 1 1 1 1
## 30 1 1 1 1 1 1 1
## 69 1 1 1 1 1 1 1
## 13 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 969 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 15 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1
## 16 1 1 1 1 1 1 1
## 22 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1
## 249 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 210 1 1 1 1 1 1 1
## 25 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 24 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1
## 41 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 152 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 68 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 80 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 12 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 20 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 12 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 16 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 25 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 0 0 0 0 0 0 3
## VE_WHEELBAS OA_AGE GV_CURBWGT VE_ORIGAVTW GV_SPLIMIT GV_FOOTPRINT
## 10930 1 1 1 1 1 1
## 5 1 1 0 1 1 1
## 2 1 1 1 1 1 1
## 250 1 1 1 1 1 1
## 97 1 1 1 1 0 1
## 1 1 0 1 1 1 1
## 138 1 1 1 1 1 1
## 51 1 1 1 1 1 1
## 64 1 1 1 1 1 1
## 385 1 1 1 1 1 1
## 1 1 1 1 1 0 1
## 1 1 0 1 1 1 1
## 4 1 1 1 1 1 1
## 1 1 1 1 1 0 1
## 2 1 1 1 1 1 1
## 4 1 1 1 1 1 1
## 2 1 1 1 1 0 1
## 1122 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 7 1 1 1 1 0 1
## 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1
## 7 1 1 1 1 1 1
## 77 1 1 1 0 1 0
## 2436 1 1 1 1 1 1
## 25 1 1 1 1 1 1
## 20 1 1 1 1 0 1
## 5 1 0 1 1 1 1
## 48 1 1 1 1 1 1
## 24 1 1 1 1 1 1
## 4 1 1 1 0 1 0
## 1 1 1 1 0 0 0
## 3 1 1 1 0 1 0
## 2 1 1 1 0 1 0
## 1 0 1 0 1 1 0
## 8 1 1 1 0 1 0
## 15 1 1 0 1 1 1
## 1 1 1 1 1 1 1
## 1221 1 1 1 1 1 1
## 35 1 1 1 1 0 1
## 30 1 1 1 1 1 1
## 69 1 1 1 1 1 1
## 13 1 1 1 1 1 1
## 2 1 1 1 1 0 1
## 1 1 0 1 1 1 1
## 969 1 1 1 1 1 1
## 4 1 1 1 1 1 1
## 15 1 1 1 0 1 0
## 3 1 1 0 1 1 1
## 2 1 1 0 1 0 1
## 8 1 1 1 1 0 1
## 16 1 1 1 1 1 1
## 22 1 1 1 1 1 1
## 3 1 1 1 1 0 1
## 7 1 1 1 1 1 1
## 1 1 1 0 1 1 1
## 8 1 1 1 1 1 1
## 249 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 4 1 1 0 1 1 1
## 210 1 1 1 1 1 1
## 25 1 1 1 1 0 1
## 4 1 1 1 1 1 1
## 24 1 1 1 1 1 1
## 6 1 1 1 1 1 1
## 41 1 1 1 0 1 0
## 1 1 0 1 0 1 0
## 2 1 1 1 0 1 0
## 2 0 1 1 1 1 0
## 2 1 1 0 1 1 1
## 152 1 1 1 1 1 1
## 2 1 1 1 1 0 1
## 3 1 0 1 1 1 1
## 68 1 1 1 1 1 1
## 1 1 1 0 1 1 1
## 2 1 1 1 1 0 1
## 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1
## 1 1 1 1 1 0 1
## 2 1 1 1 1 1 1
## 80 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 12 1 1 1 0 1 0
## 1 1 1 1 0 0 0
## 3 1 1 1 0 1 0
## 1 0 1 0 1 1 0
## 1 0 1 1 0 1 0
## 20 1 1 1 0 1 0
## 1 1 1 1 0 1 0
## 1 1 1 0 1 1 1
## 12 1 1 1 1 1 1
## 4 1 1 1 1 0 1
## 1 1 0 1 1 1 1
## 1 1 1 1 1 0 1
## 1 1 1 0 1 1 1
## 16 1 1 1 1 1 1
## 2 1 1 1 1 0 1
## 1 1 0 1 1 1 1
## 25 1 1 1 1 1 1
## 1 1 1 1 0 0 0
## 3 1 1 1 0 1 0
## 1 0 1 0 0 1 0
## 1 0 1 1 0 1 0
## 8 1 1 1 0 1 0
## 1 1 1 1 0 0 0
## 4 1 1 1 0 1 0
## 3 1 1 1 1 1 1
## 1 1 1 1 1 0 1
## 2 1 1 1 0 1 0
## 1 1 1 0 0 1 0
## 1 1 1 1 0 1 0
## 1 1 1 1 1 0 1
## 1 1 1 1 0 1 0
## 1 0 1 1 0 1 0
## 2 1 1 1 0 1 0
## 8 14 39 219 221 223
## OA_MANUSE VE_PDOF_TR GV_OTVEHWGT OA_WEIGHT OA_HEIGHT GV_DVLAT
## 10930 1 1 1 1 1 1
## 5 1 1 1 1 1 1
## 2 1 1 1 1 1 1
## 250 1 1 0 1 1 1
## 97 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 138 1 1 1 1 0 1
## 51 0 1 1 1 1 1
## 64 1 1 1 0 1 1
## 385 1 0 1 1 1 1
## 1 1 1 0 1 1 1
## 1 1 1 0 1 1 1
## 4 1 1 0 1 0 1
## 1 1 1 1 1 0 1
## 2 0 1 0 1 1 1
## 4 0 1 1 1 0 1
## 2 1 1 1 0 1 1
## 1122 1 1 1 0 0 1
## 1 0 1 1 0 1 1
## 1 1 0 0 1 1 1
## 7 1 0 1 1 1 1
## 1 1 0 1 1 0 1
## 5 0 0 1 1 1 1
## 7 1 0 1 0 1 1
## 77 1 1 1 1 1 1
## 2436 1 1 1 1 1 0
## 25 1 1 0 0 0 1
## 20 1 1 1 0 0 1
## 5 1 1 1 0 0 1
## 48 0 1 1 0 0 1
## 24 1 0 1 0 0 1
## 4 1 1 0 1 1 1
## 1 1 1 1 1 1 1
## 3 1 1 1 1 0 1
## 2 0 1 1 1 1 1
## 1 1 1 1 1 1 1
## 8 1 0 1 1 1 1
## 15 1 1 1 1 1 0
## 1 1 1 1 1 1 0
## 1221 1 1 0 1 1 0
## 35 1 1 1 1 1 0
## 30 1 1 1 1 0 0
## 69 0 1 1 1 1 0
## 13 1 1 1 0 1 0
## 2 0 1 1 0 0 1
## 1 0 1 1 0 0 1
## 969 1 0 1 1 1 0
## 4 0 0 1 0 0 1
## 15 1 1 1 0 0 1
## 3 1 1 0 1 1 0
## 2 1 1 1 1 1 0
## 8 1 1 0 1 1 0
## 16 1 1 0 1 0 0
## 22 0 1 0 1 1 0
## 3 0 1 1 1 1 0
## 7 0 1 1 1 0 0
## 1 1 1 1 0 1 0
## 8 1 1 0 0 1 0
## 249 1 1 1 0 0 0
## 1 0 1 1 0 1 0
## 4 1 0 1 1 1 0
## 210 1 0 0 1 1 0
## 25 1 0 1 1 1 0
## 4 1 0 1 1 0 0
## 24 0 0 1 1 1 0
## 6 1 0 1 0 1 0
## 41 1 1 1 1 1 0
## 1 1 1 1 0 0 1
## 2 0 1 1 0 0 1
## 2 1 1 1 1 1 0
## 2 1 1 1 0 0 0
## 152 1 1 0 0 0 0
## 2 1 1 1 0 0 0
## 3 1 1 1 0 0 0
## 68 0 1 1 0 0 0
## 1 1 0 0 1 1 0
## 2 1 0 0 1 1 0
## 1 1 0 0 1 0 0
## 3 0 0 0 1 1 0
## 1 0 0 1 1 1 0
## 2 0 0 1 1 0 0
## 80 1 0 1 0 0 0
## 1 0 0 1 0 1 0
## 12 1 1 0 1 1 0
## 1 1 1 1 1 1 0
## 3 0 1 1 1 1 0
## 1 1 1 1 1 1 0
## 1 1 1 1 1 1 0
## 20 1 0 1 1 1 0
## 1 0 0 1 0 0 1
## 1 0 1 1 0 0 0
## 12 0 1 0 0 0 0
## 4 0 1 1 0 0 0
## 1 0 1 1 0 0 0
## 1 0 0 1 1 0 0
## 1 1 0 1 0 0 0
## 16 1 0 0 0 0 0
## 2 1 0 1 0 0 0
## 1 1 0 1 0 0 0
## 25 0 0 1 0 0 0
## 1 1 1 0 1 1 0
## 3 1 1 1 0 0 0
## 1 1 1 1 1 1 0
## 1 0 1 1 1 1 0
## 8 1 0 0 1 1 0
## 1 1 0 1 1 1 0
## 4 0 0 1 1 1 0
## 3 0 0 0 0 0 0
## 1 0 0 1 0 0 0
## 2 1 1 0 0 0 0
## 1 1 0 0 1 1 0
## 1 1 0 1 0 0 0
## 1 0 0 0 0 0 0
## 1 0 1 0 0 0 0
## 1 0 1 1 0 0 0
## 2 0 0 1 0 0 0
## 385 1864 1992 2008 2116 5877
## GV_DVLONG GV_ENERGY
## 10930 1 1 0
## 5 1 1 1
## 2 1 1 1
## 250 1 1 1
## 97 1 1 1
## 1 1 1 1
## 138 1 1 1
## 51 1 1 1
## 64 1 1 1
## 385 1 1 1
## 1 1 1 2
## 1 1 1 2
## 4 1 1 2
## 1 1 1 2
## 2 1 1 2
## 4 1 1 2
## 2 1 1 2
## 1122 1 1 2
## 1 1 1 2
## 1 1 1 2
## 7 1 1 2
## 1 1 1 2
## 5 1 1 2
## 7 1 1 2
## 77 1 1 2
## 2436 0 0 3
## 25 1 1 3
## 20 1 1 3
## 5 1 1 3
## 48 1 1 3
## 24 1 1 3
## 4 1 1 3
## 1 1 1 3
## 3 1 1 3
## 2 1 1 3
## 1 1 1 3
## 8 1 1 3
## 15 0 0 4
## 1 0 0 4
## 1221 0 0 4
## 35 0 0 4
## 30 0 0 4
## 69 0 0 4
## 13 0 0 4
## 2 1 1 4
## 1 1 1 4
## 969 0 0 4
## 4 1 1 4
## 15 1 1 4
## 3 0 0 5
## 2 0 0 5
## 8 0 0 5
## 16 0 0 5
## 22 0 0 5
## 3 0 0 5
## 7 0 0 5
## 1 0 0 5
## 8 0 0 5
## 249 0 0 5
## 1 0 0 5
## 4 0 0 5
## 210 0 0 5
## 25 0 0 5
## 4 0 0 5
## 24 0 0 5
## 6 0 0 5
## 41 0 0 5
## 1 1 1 5
## 2 1 1 5
## 2 0 0 5
## 2 0 0 6
## 152 0 0 6
## 2 0 0 6
## 3 0 0 6
## 68 0 0 6
## 1 0 0 6
## 2 0 0 6
## 1 0 0 6
## 3 0 0 6
## 1 0 0 6
## 2 0 0 6
## 80 0 0 6
## 1 0 0 6
## 12 0 0 6
## 1 0 0 6
## 3 0 0 6
## 1 0 0 6
## 1 0 0 6
## 20 0 0 6
## 1 1 1 6
## 1 0 0 7
## 12 0 0 7
## 4 0 0 7
## 1 0 0 7
## 1 0 0 7
## 1 0 0 7
## 16 0 0 7
## 2 0 0 7
## 1 0 0 7
## 25 0 0 7
## 1 0 0 7
## 3 0 0 7
## 1 0 0 7
## 1 0 0 7
## 8 0 0 7
## 1 0 0 7
## 4 0 0 7
## 3 0 0 8
## 1 0 0 8
## 2 0 0 8
## 1 0 0 8
## 1 0 0 8
## 1 0 0 9
## 1 0 0 9
## 1 0 0 9
## 2 0 0 9
## 5877 5877 26723
Use mice with Decision Tree to impute missing data.
imp.data <- mice(data, m=1, method='cart', printFlag=FALSE)
summary(imp.data)
## Multiply imputed data set
## Call:
## mice(data = data, m = 1, method = "cart", printFlag = FALSE)
## Number of multiple imputations: 1
## Missing cells per column:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 39 5877 5877 5877 3
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## 0 1992 221 0 14
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## 0 2116 0 385 0
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## 2008 0 219 8 1864
## GV_FOOTPRINT
## 223
## Imputation methods:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## "cart" "cart" "cart" "cart" "cart"
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## "cart" "cart" "cart" "cart" "cart"
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## "cart" "cart" "cart" "cart" "cart"
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## "cart" "cart" "cart" "cart" "cart"
## GV_FOOTPRINT
## "cart"
## VisitSequence:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 1 2 3 4 5
## GV_OTVEHWGT GV_SPLIMIT OA_AGE OA_HEIGHT OA_MANUSE
## 7 8 10 12 14
## OA_WEIGHT VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## 16 18 19 20 21
## PredictorMatrix:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES GV_MODELYR
## GV_CURBWGT 0 1 1 1 1 1
## GV_DVLAT 1 0 1 1 1 1
## GV_DVLONG 1 1 0 1 1 1
## GV_ENERGY 1 1 1 0 1 1
## GV_LANES 1 1 1 1 0 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE OA_BAGDEPLY
## GV_CURBWGT 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1
## GV_LANES 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0
## GV_OTVEHWGT 0 1 1 1 1
## GV_SPLIMIT 1 0 1 1 1
## GV_WGTCDTR 0 0 0 0 0
## OA_AGE 1 1 1 0 1
## OA_BAGDEPLY 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1
## OA_MAIS 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1
## OA_SEX 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1
## VE_GAD1 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT VE_GAD1
## GV_CURBWGT 1 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1 1
## GV_LANES 1 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 0 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 0 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 0 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## GV_CURBWGT 1 1 1 1
## GV_DVLAT 1 1 1 1
## GV_DVLONG 1 1 1 1
## GV_ENERGY 1 1 1 1
## GV_LANES 1 1 1 1
## GV_MODELYR 0 0 0 0
## GV_OTVEHWGT 1 1 1 1
## GV_SPLIMIT 1 1 1 1
## GV_WGTCDTR 0 0 0 0
## OA_AGE 1 1 1 1
## OA_BAGDEPLY 0 0 0 0
## OA_HEIGHT 1 1 1 1
## OA_MAIS 0 0 0 0
## OA_MANUSE 1 1 1 1
## OA_SEX 0 0 0 0
## OA_WEIGHT 1 1 1 1
## VE_GAD1 0 0 0 0
## VE_ORIGAVTW 0 1 1 1
## VE_WHEELBAS 1 0 1 1
## VE_PDOF_TR 1 1 0 1
## GV_FOOTPRINT 1 1 1 0
## Random generator seed value: NA
Compare it with Mean value.
imp.data_raw_mean <- mice(data, m=1, defaultMethod=c('mean', 'cart', 'cart', 'cart'), printFlag=FALSE)
summary(imp.data_raw_mean)
## Multiply imputed data set
## Call:
## mice(data = data, m = 1, defaultMethod = c("mean", "cart", "cart",
## "cart"), printFlag = FALSE)
## Number of multiple imputations: 1
## Missing cells per column:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 39 5877 5877 5877 3
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## 0 1992 221 0 14
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## 0 2116 0 385 0
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## 2008 0 219 8 1864
## GV_FOOTPRINT
## 223
## Imputation methods:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## "mean" "mean" "mean" "mean" "cart"
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## "" "mean" "mean" "" "mean"
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## "" "mean" "" "cart" ""
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## "mean" "" "mean" "mean" "mean"
## GV_FOOTPRINT
## "mean"
## VisitSequence:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 1 2 3 4 5
## GV_OTVEHWGT GV_SPLIMIT OA_AGE OA_HEIGHT OA_MANUSE
## 7 8 10 12 14
## OA_WEIGHT VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## 16 18 19 20 21
## PredictorMatrix:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES GV_MODELYR
## GV_CURBWGT 0 1 1 1 1 1
## GV_DVLAT 1 0 1 1 1 1
## GV_DVLONG 1 1 0 1 1 1
## GV_ENERGY 1 1 1 0 1 1
## GV_LANES 1 1 1 1 0 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE OA_BAGDEPLY
## GV_CURBWGT 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1
## GV_LANES 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0
## GV_OTVEHWGT 0 1 1 1 1
## GV_SPLIMIT 1 0 1 1 1
## GV_WGTCDTR 0 0 0 0 0
## OA_AGE 1 1 1 0 1
## OA_BAGDEPLY 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1
## OA_MAIS 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1
## OA_SEX 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1
## VE_GAD1 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT VE_GAD1
## GV_CURBWGT 1 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1 1
## GV_LANES 1 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 0 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 0 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 0 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## GV_CURBWGT 1 1 1 1
## GV_DVLAT 1 1 1 1
## GV_DVLONG 1 1 1 1
## GV_ENERGY 1 1 1 1
## GV_LANES 1 1 1 1
## GV_MODELYR 0 0 0 0
## GV_OTVEHWGT 1 1 1 1
## GV_SPLIMIT 1 1 1 1
## GV_WGTCDTR 0 0 0 0
## OA_AGE 1 1 1 1
## OA_BAGDEPLY 0 0 0 0
## OA_HEIGHT 1 1 1 1
## OA_MAIS 0 0 0 0
## OA_MANUSE 1 1 1 1
## OA_SEX 0 0 0 0
## OA_WEIGHT 1 1 1 1
## VE_GAD1 0 0 0 0
## VE_ORIGAVTW 0 1 1 1
## VE_WHEELBAS 1 0 1 1
## VE_PDOF_TR 1 1 0 1
## GV_FOOTPRINT 1 1 1 0
## Random generator seed value: NA
Plot GV_DVLAT and VE_PDOF_TR for imputed data using Decision Tree.
xyplot(imp.data, GV_DVLAT ~ VE_PDOF_TR)
Plot GV_DVLONG and VE_PDOF_TR for imputed data using Decision Tree.
xyplot(imp.data, GV_DVLONG ~ VE_PDOF_TR)
Plot GV_DVLAT and GV_FOOTPRINT for imputed data using Decision Tree.
xyplot(imp.data, GV_DVLAT ~ GV_FOOTPRINT)
Plot GV_DVLAT and GV_FOOTPRINT for imputed data using Decision Tree.
xyplot(imp.data, GV_DVLONG ~ GV_FOOTPRINT)
Plot GV_DVLAT and VE_PDOF_TR for imputed data using Mean value.
xyplot(imp.data_raw_mean, GV_DVLAT ~ VE_PDOF_TR)
Plot GV_DVLONG and VE_PDOF_TR for imputed data using Mean value.
xyplot(imp.data_raw_mean, GV_DVLONG ~ VE_PDOF_TR)
Plot GV_DVLAT and GV_FOOTPRINT for imputed data using Mean value.
xyplot(imp.data_raw_mean, GV_DVLAT ~ GV_FOOTPRINT)
Plot GV_DVLONG and GV_FOOTPRINT for imputed data using Mean value.
xyplot(imp.data_raw_mean, GV_DVLONG ~ GV_FOOTPRINT)
Save the imputed data.
data_complete <- complete(imp.data)
write.csv(data_complete, "./vehicle_safety_imputed_data.csv")